/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package decisionTree;

import java.io.FileNotFoundException;

/**
 *
 * @author hubert
 */
public class articleTextToWordVector {
    public static top100List convertText(java.io.File inputFile, top100List templateVector){
        top100List serve = new top100List();
        for(stringWeightPair entry : templateVector.getWordWeightList()){
            serve.getWordWeightList().add(new stringWeightPair(entry.string, 0.0));
        }
        try{
            java.util.Scanner reader = new java.util.Scanner(inputFile);
            while(reader.hasNextLine()){
                String cur = reader.nextLine();
                String[] wordsInCur = cur.split(" ");
                for(String wrd : wordsInCur){
                    int indx = templateVector.indexOf(wrd.trim());
                    if(indx >= 0){
                        serve.getWordWeightList().get(indx).weight = serve.getWordWeightList().get(indx).weight + 1.0;
                    }
                }
            }
        }catch(java.io.FileNotFoundException e){
            e.printStackTrace();
        }
        
        
        return serve;
    }
    
    public static void main(String args[]){
        System.out.println(convertText(new java.io.File("/home/hubert/hadoopStuff/sampleCodeRun/input/isCancer/isCancer/Astrocytoma.txt"), new top100ListZScore()));
    }
}
